In [1]:
# importing necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.pipeline import Pipeline
C:\Users\HP\anaconda3\Lib\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (
In [2]:
# Load data
df = pd.read_csv("wireless_churn.csv")
df.head()
Out[2]:
AccountWeeks ContractRenewal DataPlan DataUsage CustServCalls DayMins DayCalls MonthlyCharge OverageFee RoamMins Churn
0 128 1 1 2.7 1 265.1 110 89.0 9.87 10.0 0
1 107 1 1 3.7 1 161.6 123 82.0 9.78 13.7 0
2 137 1 0 0.0 0 243.4 114 52.0 6.06 12.2 0
3 84 0 0 0.0 2 299.4 71 57.0 3.10 6.6 0
4 75 0 0 0.0 3 166.7 113 41.0 7.42 10.1 0
In [3]:
# Create Profile Report
 
import ydata_profiling 
from ydata_profiling import ProfileReport

profile = ProfileReport(df, title="Wiseless_churn Profile Report")
profile.to_widgets()

profile.to_notebook_iframe()
C:\Users\HP\anaconda3\Lib\site-packages\numba\core\decorators.py:262: NumbaDeprecationWarning: numba.generated_jit is deprecated. Please see the documentation at: https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-generated-jit for more information and advice on a suitable replacement.
  warnings.warn(msg, NumbaDeprecationWarning)
C:\Users\HP\anaconda3\Lib\site-packages\visions\backends\shared\nan_handling.py:50: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @nb.jit
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]
VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
In [9]:
# Step 1: Remove Outliers

iso = IsolationForest(contamination=0.02, random_state=100)
outliers = iso.fit_predict(df.drop('Churn', axis=1))
df_clean = df[outliers == 1]
In [10]:
# Step 2: Prepare Data

X = df_clean.drop("Churn", axis=1)
y = df_clean["Churn"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)
In [11]:
# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
In [19]:
# Step 3: Learning Curves
%matplotlib inline

def plot_learning_curve(estimator, X, y, title):
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=5, scoring='recall_weighted',
        train_sizes=np.linspace(0.1, 1.0, 5), random_state=100
    )
    train_scores_mean = np.mean(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    plt.figure()
    plt.plot(train_sizes, train_scores_mean, 'o-', label="Training recall")
    plt.plot(train_sizes, test_scores_mean, 'o-', label="Cross-validation recall")
    plt.title(title)
    plt.xlabel("Training Examples")
    plt.ylabel("Recall")
    plt.legend(loc="best")
    plt.grid()
    plt.show()

log_reg = LogisticRegression(solver='lbfgs', class_weight='balanced', max_iter=1000, random_state=100)
nb = GaussianNB()

plot_learning_curve(log_reg, X_train_scaled, y_train, "Learning Curve - Logistic Regression")
plot_learning_curve(nb, X_train_scaled, y_train, "Learning Curve - Naive Bayes")
In [16]:
# Step 4: Optimized Models

log_reg.fit(X_train_scaled, y_train)
nb.fit(X_train_scaled, y_train)

y_pred_log = log_reg.predict(X_test_scaled)
y_pred_nb = nb.predict(X_test_scaled)

print("Logistic Regression Report:")
print(classification_report(y_test, y_pred_log))
print("ROC AUC:", roc_auc_score(y_test, log_reg.predict_proba(X_test_scaled)[:,1]))

print("\nNaive Bayes Report:")
print(classification_report(y_test, y_pred_nb))
print("ROC AUC:", roc_auc_score(y_test, nb.predict_proba(X_test_scaled)[:,1]))
Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.95      0.78      0.86       562
           1       0.36      0.76      0.49        92

    accuracy                           0.78       654
   macro avg       0.66      0.77      0.67       654
weighted avg       0.87      0.78      0.81       654

ROC AUC: 0.8372466346897726

Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.91      0.95      0.93       562
           1       0.59      0.45      0.51        92

    accuracy                           0.88       654
   macro avg       0.75      0.70      0.72       654
weighted avg       0.87      0.88      0.87       654

ROC AUC: 0.8573804734643354
In [20]:
# Step 5: Voting Ensemble

ada = AdaBoostClassifier(n_estimators=100, random_state=100)
ensemble = VotingClassifier(estimators=[
    ('lr', log_reg), ('ada', ada)
], voting='soft')

ensemble.fit(X_train_scaled, y_train)
y_pred_ens = ensemble.predict(X_test_scaled)

print("\nVoting Ensemble Report:")
print(classification_report(y_test, y_pred_ens))
print("ROC AUC:", roc_auc_score(y_test, ensemble.predict_proba(X_test_scaled)[:,1]))
Voting Ensemble Report:
              precision    recall  f1-score   support

           0       0.95      0.84      0.89       562
           1       0.42      0.71      0.53        92

    accuracy                           0.82       654
   macro avg       0.68      0.77      0.71       654
weighted avg       0.87      0.82      0.84       654

ROC AUC: 0.869507194801176
In [ ]: